knitr::opts_chunk$set(echo=TRUE, include=TRUE, message=TRUE,
warning=FALSE, paged.print=FALSE, fig.width = 7)
##Important!! Enter credential information. Include last "/" of the baseurl
##ENTER BASE URL, USERNAME, PASSWORD
baseurl<-params$baseurl
username<-params$username
##Load required packages
required_packages<-c("tidyverse","httr","assertthat","readr","knitr",
"tidyr","jsonlite", "keyring", "lubridate","survival","survminer",
"GGally","ggfortify")
is_installed <- function(mypkg) is.element(mypkg, installed.packages()[,1])
load_or_install<-function(required_packages) {
for(package_name in required_packages) {
if(!is_installed(package_name)) {
install.packages(package_name) }
library(package_name,character.only=TRUE,quietly=TRUE,verbose=FALSE)
}
}
load_or_install(required_packages)
The WHO HIV Case Based surveillance metadata package can be used to capture patient-level data on ART regimens over time, including date of ART initiation and patient status. However, DHIS2 analytics are not advanced enough to build Cox Proportional Hazards models, or survival curves like below. These can be done in R with the survminer and survival packages.
ggsurvplot(
fit = survfit(Surv(time, status) ~ 1, data = lung),
xlab = "Days",
ylab = "Overall survival probability",
title="Generic survival curve")
This demo will show how to download the DHIS2 Event Reports required to build survival curves in an RMarkdown report.
Data have been collected from a demo DHIS2 database that uses the WHO metadata package for HIV case surveillance.
This event report shows the latest Visit stage for all enrollments from the past 5 years and present year.
The variables included are…
Two ways to import the data: download and then read CSV, or import directly through API.
To download the event report, go to Download > CSV
Data from the export could be then read into R for analysis with the readr::read_csv() function.
To import through DHIS2 API, the first thing to do is to enter credentials. On the first time running this script, you will be asked for a password to log-in to the demo system.
Once the keyring package stores and encrypts the password as a local environment variable, you can comment out that password set function Next time you run the script on this machine, you will not need to enter the password.
Using the API in this way allows for more process automation: run the script once, and later you can execute it routinely with a Cron job to access and publish the latest data.
#youll now be prompted for password
# if you hav already set the password with keyring, you can comment this out
# keyring::key_set("Password", username=username)
##test login
loginDHIS2<-function(baseurl,username,password) {
url<-paste0(baseurl,"api/me")
r<-GET(url,authenticate(username,
keyring::key_get("Password", username=username)))
warn_for_status(r, task="log in")
if(r$status_code == 200L){return(TRUE)}
}
if(loginDHIS2(baseurl, username, password)==TRUE){
print("successfully logged in")
}else{
stop("could not log in! Please check url, username and password")
}
## [1] "successfully logged in"
The API request url from the event report is found when going to Download > HTML . Simply change the link’s extension from .html+css to .csv
More details about the DHIS2 API can be found in the DHIS2 Developer Guide
read_csv() will show the columns imported and inferred column types
url<-paste0(baseurl,
"api/29/analytics/enrollments/query/Xh88p1nyefp.csv?", #enrollment query in CSV
"dimension=pe:THIS_YEAR;LAST_5_YEARS", #this year and last 5
"&dimension=ou:USER_ORGUNIT",#user orgunit
"&dimension=Jt68iauILtD", #Gender attribute
"&dimension=ang4CLldbIu.S4IJiirQVHY", # VL test result
"&dimension=ang4CLldbIu.JyeF0Utlhfp:GT:1&", #days since ART > 1 (follow up visit)
"&dimension=ang4CLldbIu.rcUsYgOnlyF", # latest status
"&stage=ang4CLldbIu&displayProperty=NAME", #identify stage and the displayName
"&tableLayout=true&dataIdScheme=NAME", #use name of DE
"&columns=pe;ou;Jt68iauILtD;JyeF0Utlhfp;S4IJiirQVHY;rcUsYgOnlyF", #table layout
"&rows=pe;ou;Jt68iauILtD;JyeF0Utlhfp;S4IJiirQVHY;rcUsYgOnlyF&paging=false")
# function to fetch data from API
dta<-read_csv(httr::content(GET(url)))
##
## -- Column specification --------------------------------------------------------
## cols(
## Enrollment = col_character(),
## `Tracked entity instance` = col_character(),
## `Enrollment date` = col_datetime(format = ""),
## `Incident date` = col_datetime(format = ""),
## Geometry = col_logical(),
## Longitude = col_double(),
## Latitude = col_double(),
## `Organisation unit name` = col_character(),
## `Organisation unit code` = col_character(),
## `Organisation unit` = col_character(),
## `Gender M, F, TG` = col_character(),
## `HIV - Viral load <1000` = col_double(),
## `HIV - Days Since Treatment Start` = col_double(),
## `HIV - Treatment Status` = col_character()
## )
When cleaning the data, observations are considered CENSORED if the latest visit status was On ART or Transferred Out. That means the patient did not die or stop treatment before the end of the observation period.
CS_data<-dta %>%
janitor::clean_names() %>% ## clean variable names
select(contains("enrollment"), contains("gender"),
contains("viral"), contains("treatment")) %>% ## select vars
rename("gender"=3, "vl_suppressed"=4, "days_since_start"=5,"status"=6) %>%
filter(status!="NA") %>% # remove NA#
mutate(vl_suppressed=if_else(is.na(vl_suppressed),0,1)) %>% # replace Na with 0
mutate(censored=if_else(str_detect(status,c("ART|Transfer")), 1, 2))
# censor iff latest status On ART or transfer, otherwise status implies death/ltfu
#show first rows...
head(CS_data) %>%
kable()
| enrollment | enrollment_date | gender | vl_suppressed | days_since_start | status | censored |
|---|---|---|---|---|---|---|
| sN4rsb88a5H | 2020-02-12 | Male | 0 | 414 | On ART | 1 |
| TdDo0b6IZg3 | 2020-10-25 | Female | 0 | 189 | On ART | 1 |
| T7epRIGcG00 | 2021-03-11 | Female | 0 | 29 | On ART | 1 |
| LH2ZsCJZxQv | 2019-12-26 | Female | 0 | 497 | On ART | 1 |
| bBKF1KYului | 2019-08-25 | Transgender | 1 | 584 | On ART | 1 |
| Pu1jPvNWMYy | 2020-01-10 | Transgender | 1 | 455 | On ART | 1 |
##summarize data
CS_data %>%
select(enrollment_date,days_since_start) %>%
# split(.$status) %>%
map(summary)
## $enrollment_date
## Min. 1st Qu. Median
## "2018-12-02 00:00:00" "2019-05-23 00:00:00" "2019-11-06 00:00:00"
## Mean 3rd Qu. Max.
## "2019-11-20 03:47:41" "2020-04-03 00:00:00" "2021-04-23 00:00:00"
##
## $days_since_start
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 10.0 268.0 497.0 480.3 675.0 1027.0
##count by latest status
CS_data %>%
select(vl_suppressed, gender, status) %>%
map(summary.factor)
## $vl_suppressed
## 0 1
## 2287 7326
##
## $gender
## Female Male Transgender
## 5443 3772 398
##
## $status
## Death (documented) Lost to follow up
## 369 335
## On ART Refused (stopped) treatment
## 8159 232
## Transferred out
## 518
## histogram
CS_data %>%
ggplot()+
geom_histogram(aes(days_since_start))+
facet_wrap(~status) + ## histogram by status
labs(title="Histogram by Last Visit Status")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Basic model without covariates, basic plot.
km_fit<-survfit(Surv(days_since_start, censored) ~ 1,
data=CS_data,
type="kaplan-meier")
km_fit
## Call: survfit(formula = Surv(days_since_start, censored) ~ 1, data = CS_data,
## type = "kaplan-meier")
##
## n events median 0.95LCL 0.95UCL
## 9613 936 1027 NA NA
plot(km_fit)
km_fit1<-survfit(Surv(days_since_start, censored) ~ gender,
data=CS_data,
type="kaplan-meier")
km_fit1
## Call: survfit(formula = Surv(days_since_start, censored) ~ gender,
## data = CS_data, type = "kaplan-meier")
##
## n events median 0.95LCL 0.95UCL
## gender=Female 5443 502 1027 NA NA
## gender=Male 3772 396 1027 NA NA
## gender=Transgender 398 38 NA NA NA
# summary(km_fit1)
km_fit2<-survfit(Surv(days_since_start, censored) ~ vl_suppressed,
data=CS_data,
type="kaplan-meier")
km_fit2
## Call: survfit(formula = Surv(days_since_start, censored) ~ vl_suppressed,
## data = CS_data, type = "kaplan-meier")
##
## n events median 0.95LCL 0.95UCL
## vl_suppressed=0 2287 350 NA NA NA
## vl_suppressed=1 7326 586 1027 NA NA
the Kaplan-Meier model is plotted with the survminer R package.
Code adapted from: http://www.sthda.com/english/wiki/survminer-0-3-0
NB! These models are based on case surveillance dummy data and are for demonstration purposes only
# fit <- survfit(Surv(time, status) ~ sex, data = lung)
plotObj<-ggsurvplot(km_fit1, data = CS_data,
title = "HIV Survival on ART: By Gender",
pval = TRUE, pval.method = TRUE, # Add p-value & method name
legend.title = "VL<1000 copies/ml", # Change legend titles
palette = "lancet", # Use JCO journal color palette
risk.table = TRUE, # Add No at risk table
# cumevents = TRUE, # Add cumulative No of events table
tables.height = 0.2, # Specify tables height
tables.theme = theme_cleantable(), # Clean theme for tables
tables.y.text = FALSE, # Hide tables y axis text
xlab = "Days Since Treatment Start",
conf.int = TRUE,
xlim = c(0,750),
ylim = c(0.5,1)
)
# Now save the plot image
# ggsave("survivalplot.png", print(plotObj))
plotObj
# fit <- survfit(Surv(time, status) ~ sex, data = lung)
plotObj<-ggsurvplot(km_fit2, data = CS_data,
title = "HIV Survival on ART",
pval = TRUE, pval.method = TRUE, # Add p-value & method name
legend.title = "", # Change legend titles
palette = "lancet", # Use JCO journal color palette
risk.table = TRUE, # Add No at risk table
# cumevents = TRUE, # Add cumulative No of events table
tables.height = 0.2, # Specify tables height
tables.theme = theme_cleantable(), # Clean theme for tables
tables.y.text = FALSE, # Hide tables y axis text
xlab = "Days Since Treatment Start",
conf.int = TRUE,
xlim = c(0,750),
ylim = c(0.5,1)
)
# Now save the plot image
ggsave("survivalplot.png", print(plotObj))
## Saving 7 x 5 in image
plotObj
survdiff(Surv(days_since_start, censored) ~ gender,
data=CS_data)
## Call:
## survdiff(formula = Surv(days_since_start, censored) ~ gender,
## data = CS_data)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## gender=Female 5443 502 521.4 0.721 1.763
## gender=Male 3772 396 371.5 1.621 2.915
## gender=Transgender 398 38 43.1 0.613 0.656
##
## Chisq= 3.2 on 2 degrees of freedom, p= 0.2
broom::tidy(
coxph(Surv(days_since_start, censored) ~ vl_suppressed,
data = CS_data),
exp = TRUE) %>%
kable()
| term | estimate | std.error | statistic | p.value |
|---|---|---|---|---|
| vl_suppressed | 0.101974 | 0.0713021 | -32.0192 | 0 |
Knitr to turn RMarkdown doc into an HTML webpage, PDF, or slide deckOr… import CSV from DHIS2 into your choice of software e.g. STATA, SPSS
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_United States.1252
## [2] LC_CTYPE=English_United States.1252
## [3] LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.1252
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggfortify_0.4.11 GGally_2.1.1 survminer_0.4.9 ggpubr_0.4.0
## [5] survival_3.2-7 lubridate_1.7.9.2 keyring_1.1.0 jsonlite_1.7.2
## [9] knitr_1.31 assertthat_0.2.1 httr_1.4.2 forcats_0.5.1
## [13] stringr_1.4.0 dplyr_1.0.3 purrr_0.3.4 readr_1.4.0
## [17] tidyr_1.1.2 tibble_3.0.5 ggplot2_3.3.3 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] ggtext_0.1.1 fs_1.5.0 RColorBrewer_1.1-2 ggsci_2.9
## [5] tools_4.0.2 backports_1.2.1 utf8_1.2.1 R6_2.5.0
## [9] DBI_1.1.1 colorspace_2.0-0 withr_2.4.2 tidyselect_1.1.1
## [13] gridExtra_2.3 curl_4.3 compiler_4.0.2 cli_2.5.0
## [17] rvest_0.3.6 xml2_1.3.2 labeling_0.4.2 scales_1.1.1
## [21] survMisc_0.5.5 digest_0.6.27 foreign_0.8-81 rmarkdown_2.6
## [25] rio_0.5.26 pkgconfig_2.0.3 htmltools_0.5.1.1 highr_0.9
## [29] dbplyr_2.0.0 rlang_0.4.10 readxl_1.3.1 rstudioapi_0.13
## [33] generics_0.1.0 farver_2.1.0 zoo_1.8-8 zip_2.1.1
## [37] car_3.0-10 magrittr_2.0.1 Matrix_1.2-18 Rcpp_1.0.6
## [41] munsell_0.5.0 fansi_0.4.2 abind_1.4-5 lifecycle_1.0.0
## [45] stringi_1.5.3 yaml_2.2.1 snakecase_0.11.0 carData_3.0-4
## [49] plyr_1.8.6 grid_4.0.2 crayon_1.4.1 lattice_0.20-41
## [53] haven_2.3.1 splines_4.0.2 gridtext_0.1.4 hms_1.0.0
## [57] pillar_1.6.0 markdown_1.1 ggsignif_0.6.1 reprex_1.0.0
## [61] glue_1.4.2 evaluate_0.14 data.table_1.13.6 modelr_0.1.8
## [65] vctrs_0.3.6 cellranger_1.1.0 gtable_0.3.0 reshape_0.8.8
## [69] km.ci_0.5-2 xfun_0.20 openxlsx_4.2.3 janitor_2.1.0
## [73] xtable_1.8-4 broom_0.7.6 rstatix_0.7.0 KMsurv_0.1-5
## [77] ellipsis_0.3.1